# Load library
library(Seurat)
library(monocle)
library(princurve)
library(cluster)
library(parallel)
library(ggplot2)
library(ggExtra)
library(dplyr)
library(reshape)
library(gridExtra)
library(RColorBrewer)
library(wesanderson)
library(viridis)
#Set ggplot theme as classic
theme_set(theme_classic())We perform Kmeans clustering on the 2 cell state scores :
APBPset.seed(100)
#K-means clustering based on AP, BP scores across cells
cl <- kmeans(cbind(Allcells.data@meta.data$AP_signature1, Allcells.data@meta.data$BP_signature1), 3)
Allcells.data@meta.data$kmeanClust <- paste0("Clust.",cl$cluster)col.pal <- wes_palette("GrandBudapest1", 3, type = "discrete")
p1 <- ggplot(Allcells.data@meta.data, aes(x=AP_signature1, y=BP_signature1, colour = kmeanClust)) +
scale_color_manual(values=col.pal) +
geom_point() +
theme(legend.position="none")
ggMarginal(p1, type = "histogram", fill="lightgrey") ; rm(p1)DimPlot(Allcells.data,
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
no.legend = F)We then extract the glutamatergic neuron branche as beeing the Kmeans cluster with the highest mean Apical progenitor signature
#Fin cluster wiht the highest mean APscore
MeanKclust.APscore <- aggregate(AP_signature1 ~ kmeanClust, Allcells.data@meta.data, mean)
APclust <- MeanKclust.APscore %>% filter(AP_signature1 == max(AP_signature1)) %>% pull(kmeanClust)
#Extract apical progenitors
barcodes <- Allcells.data@meta.data %>% filter(kmeanClust == APclust) %>% pull(Barcodes)
AP.data <- SubsetData(Allcells.data, cells.use = barcodes , subset.raw = T, do.clean = F)
#Further filter the 3 outlier cells based on spring coordinates
cells <- rownames(AP.data@dr$spring@cell.embeddings[AP.data@dr$spring@cell.embeddings[,2] > 250,])
AP.data <- SubsetData(AP.data, cells.use = cells , subset.raw = T, do.clean = F)
DimPlot(AP.data,
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
no.legend = F)#Remove non epressed genes
num.cells <- Matrix::rowSums(AP.data@data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 20)])
AP.data@raw.data <- AP.data@raw.data[genes.use, ]
AP.data@data <- AP.data@data[genes.use, ]
#Normalize and Scale the data
AP.data <- NormalizeData(object = AP.data,
normalization.method = "LogNormalize",
scale.factor = round(median(AP.data@meta.data$nUMI)),
display.progress = F)
AP.data <- FindVariableGenes(object = AP.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.0125,
x.high.cutoff = 3,
y.cutoff = 1, do.plot = F,
display.progress = F)
AP.data <- ScaleData(object = AP.data, vars.to.regress = c("CC.Difference","percent.mito", "nUMI"), display.progress = F)Cell cycle associated genes were excluded for PCA dimensionality reduction and Spring plot was generating with these parameter :
Number of cells: 1648
Number of genes that passed filter: 857
Min expressing cells (gene filtering): 3
Min number of UMIs (gene filtering): 3
Gene variability %ile (gene filtering): 90
Number of principal components: 7
Number of nearest neighbors: 20
Number of force layout iterations: 500
#Import Spring coordinates calculated without cell cycle genes
Coordinates <- read.table("./Progenitors/E12.AP.Coordinates.txt", sep=",", header = F)[,c(2,3)]
rownames(Coordinates) <- rownames(AP.data@meta.data)
AP.data <- SetDimReduction(AP.data,
reduction.type = "spring.AP",
slot = "cell.embeddings",
new.data = as.matrix(Coordinates))
AP.data@dr$spring.AP@key <- "spring.AP"
colnames(AP.data@dr$spring.AP@cell.embeddings) <- paste0(GetDimReduction(object = AP.data, reduction.type = "spring.AP",slot = "key"), c(1,2))data <- data.frame(springAP.1 = AP.data@dr$spring.AP@cell.embeddings[,1],
springAP.2 = AP.data@dr$spring.AP@cell.embeddings[,2],
spring1 = AP.data@dr$spring@cell.embeddings[,1],
spring2 = AP.data@dr$spring@cell.embeddings[,2])
# Fit the principal curve
fit <- principal_curve(as.matrix(data[,1:2]),
smoother='lowess',
trace=TRUE,
f = 0.7,
stretch=0,
plot_iterations = F)## Starting curve---distance^2: 2022449778
## Iteration 1---distance^2: 745172
## Iteration 2---distance^2: 696270.5
## Iteration 3---distance^2: 677509.1
## Iteration 4---distance^2: 668251.7
## Iteration 5---distance^2: 663393.6
## Iteration 6---distance^2: 660928.6
## Iteration 7---distance^2: 659586.4
## Iteration 8---distance^2: 658917.8
## Iteration 9---distance^2: 658638.8
DorsoVentral.Score <- fit$lambda/max(fit$lambda) #The actual speudotime
pc.line <- as.data.frame(fit$s[order(fit$lambda),]) #The principal curve smoothed
data$Phase <- as.character(AP.data@meta.data$Phase)
data$DorsoVentral.Score <- DorsoVentral.Score
# Direction of the maturation score using Zbtb20 expression (reverte if positive correlation)
if (cor(data$DorsoVentral.Score, AP.data@data['Zbtb20', ]) > 0) { data$DorsoVentral.Score <- -(data$DorsoVentral.Score - max(data$DorsoVentral.Score))}
AP.data@meta.data$DorsoVentral.Score <- data$DorsoVentral.Score#Plot Cell onto PC1 and PC2 with principal curve
ggplot(data, aes(springAP.1, springAP.2)) +
geom_point(aes(color=Phase), size=2, shape=16, values=col.pal) +
geom_line(data=pc.line, color='red', size=0.77)#Plot Speudotime color gradient on the cell cycle filtered Spring embbeding
ggplot(data, aes(springAP.1, springAP.2)) +
geom_point(aes(color=DorsoVentral.Score), size=2, shape=16) +
scale_color_viridis(direction = -1) +
geom_line(data=pc.line, color='red', size=0.77)Manuscript Fig. 5A
#Plot Speudotime color gradient on the Spring embbeding calculated from full dataset
ggplot(data, aes(spring1, spring2)) +
geom_point(aes(color=DorsoVentral.Score), size=2, shape=16) +
scale_color_viridis(direction = -1) # Transfert metadata
meta.data <- data.frame(barcode = rownames(AP.data@meta.data),
Cluster = AP.data@meta.data$old.ident,
DorsoVentral.Score = AP.data@meta.data$DorsoVentral.Score,
CellcyclePhase = AP.data@meta.data$Phase,
row.names = rownames(AP.data@meta.data))
Annot.data <- new('AnnotatedDataFrame', data = meta.data)
# Transfert count data
count.data = data.frame(gene_short_name = rownames(AP.data@raw.data),
row.names = rownames(AP.data@raw.data))
feature.data <- new('AnnotatedDataFrame', data = count.data)
# Create the CellDataSet object
gbm_cds <- newCellDataSet(as.matrix(AP.data@raw.data),
phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 1,
expressionFamily = negbinomial())# Exclude cell cycle associated genes
CCgenes <- as.character(read.table("./Progenitors/CellCycleGenes.csv", sep = "\t", header = F)[,1])
Input.genes <- AP.data@var.genes[!AP.data@var.genes %in% CCgenes]# Perform the test for differential expression as a function of pseudo-DV score while controling for cell cycle phase
DV.Axis.genes <- differentialGeneTest(gbm_cds[Input.genes,],
fullModelFormulaStr = "~sm.ns(DorsoVentral.Score, df = 3)*CellcyclePhase",
reducedModelFormulaStr = "~CellcyclePhase",
cores = detectCores() -2)
# Filter genes with a FDR < 0.001
DV.Axis.genes.FDR.filtered <- DV.Axis.genes %>% filter(qval < 1e-3)# Create a new pseudo-DV vector of 500 points
nPoints <- 500
new_data <- data.frame(DorsoVentral.Score = seq(min(pData(gbm_cds)$DorsoVentral.Score), max(pData(gbm_cds)$DorsoVentral.Score), length.out = nPoints))
# Smooth gene expression
Smooth.curve.matrix <- genSmoothCurves(gbm_cds[as.character(DV.Axis.genes.FDR.filtered$gene_short_name),],
trend_formula = "~sm.ns(DorsoVentral.Score, df = 3)",
relative_expr = TRUE,
new_data = new_data,
cores= detectCores() - 2)set.seed(100)
# Cluster genes using the Partitioning Around Medoids algorithm
DV.Axis.genes.clusters <- pam(as.dist((1-cor(Matrix::t(Smooth.curve.matrix), method = "spearman"))), k=9)
table(DV.Axis.genes.clusters$clustering)##
## 1 2 3 4 5 6 7 8 9
## 122 82 104 42 50 45 21 23 48
# Store the results
Gene.dynamique <- data.frame(Gene= names(DV.Axis.genes.clusters$clustering),
pval= DV.Axis.genes.FDR.filtered$pval,
qval=DV.Axis.genes.FDR.filtered$qval,
num_cells_expressed=DV.Axis.genes.FDR.filtered$num_cells_expressed,
Waves = DV.Axis.genes.clusters$clustering) %>% arrange(Waves)
row.names(Gene.dynamique) <- Gene.dynamique$Gene
Gene.dynamique$Gene.Clusters <- paste0("Clust.",Gene.dynamique$Waves)
write.table(Gene.dynamique, "./Progenitors/Gene.dynamique.csv", sep = ";", quote = F)#Plot gene clusters trends
Clusters.trend(AP.data,
Which.cluster = 1:9,
clust.list = DV.Axis.genes.clusters,
group.by = "global",
span = 1,
Smooth.method = "auto",
Use.scale.data = T)Manuscript Fig. 5C
set.seed(100)
# Cluster cells using the Partitioning Around Medoids algorithm
Cells.Clust <- pam(as.dist((1 - cor(Smooth.curve.matrix ,method = "spearman"))), k=7)
Domaines.Clust <- data.frame(Domaines = paste0("Clust.",Cells.Clust$clustering))# Re-order gene expression cluster along the axis
Sorted.gene.dyn <- Gene.dynamique %>% arrange(factor(Gene.Clusters, levels = paste0("Clust.",c(7,2,6,3,1,9,5,4,8))))
rownames(Sorted.gene.dyn) <- Sorted.gene.dyn$Gene
anno.colors <- list(Domaines = c(Clust.1="#83c3b8", Clust.2="#009fda", Clust.3="#3e69ac", Clust.4="#e46b6b", Clust.5="#e3c148", Clust.6="#b7d174", Clust.7="#68b041"),
Gene.Clusters = c(Clust.1 ="#cc3a1b" , Clust.2="#046c9a", Clust.3="#e7823a", Clust.4="#cc8778", Clust.5="#68b041", Clust.6="#5ab793", Clust.7="#e3c148", Clust.8="#e46B6b", Clust.9="#b79f0b"))
pheatmap::pheatmap(Smooth.curve.matrix[as.character(Sorted.gene.dyn$Gene),],
scale = "row",
cluster_rows = F,
cluster_cols = F,
gaps_col = cumsum(as.numeric(table(Domaines.Clust$Domaines))),
gaps_row = cumsum(as.numeric(table(Sorted.gene.dyn$Gene.Clusters)[paste0("Clust.",c(7,2,6,3,1,9,5,4,8))])) ,
annotation_row = Sorted.gene.dyn %>% dplyr::select(Gene.Clusters),
annotation_col = Domaines.Clust,
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = T,
fontsize_row = 2,
color = rev(brewer.pal(11,"RdBu")),
breaks = seq(-2.5,2.5, length.out = 11),
main = "Genes expression along Dorso-Ventral axis")Manuscript Fig. 5C
We assign domain identity based on clusters’ transcriptional profile by setting boundaries over pseudo-dv score
# Set the boundary over speudotime score
new_data$cluster <- Domaines.Clust$Domaines
Infered.Domain.boundary <- aggregate(DorsoVentral.Score ~ cluster, new_data, max) %>% pull(DorsoVentral.Score)
# Assign identity based on the position of the cell on the pseudo-dv axis
Domaine.Ident <- sapply(AP.data@meta.data$DorsoVentral.Score,
function(x){ if(x<Infered.Domain.boundary[1]){ x = "Sub.Pallium.1"
} else if(x> Infered.Domain.boundary[1] & x< Infered.Domain.boundary[2]){ x ="Sub.Pallium.2"
} else if(x> Infered.Domain.boundary[2] & x< Infered.Domain.boundary[3]){ x = "Sub.Pallium.3"
} else if(x> Infered.Domain.boundary[3] & x< Infered.Domain.boundary[4]){ x = "Ventral.Pallium"
} else if(x> Infered.Domain.boundary[4] & x< Infered.Domain.boundary[5]){ x = "lateral.Pallium.1"
} else if(x> Infered.Domain.boundary[6]){ x = "Dorsal.Pallium"
} else x="lateral.Pallium.2"})
# Transfert the identity to the Seurat object
AP.data@meta.data$Domaine <- Domaine.Ident
AP.data <- SetAllIdent(AP.data, id = "Domaine")Plot.Genes.trend(AP.data,
genes = c("Gsx2", "Dbx1", "Gm29260", "Tfap2c", "Emx1", "Lrrn1"),
Use.scale.data = F)Manuscript Fig. 5D
DimPlot(AP.data,
group.by = "Domaine",
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=F,
label.size = 4,
no.legend = F,
cols.use = tolower(c("#68B041", "#E3C148", "#B7D174", "#83C3B8", "#009FDA", "#3E69AC", "#E46B6B"))
)#load full dataset
Allcells.data <- readRDS("./QC.filtered.cells.RDS")
#Transfer the identities
Rename.Clust <- function(Clustdata, RawQCdata) {
unClustered.cells <- RawQCdata@meta.data$Barcodes
RawQCdata <- SetIdent(RawQCdata, cells.use = unClustered.cells, ident.use = "All.Unclustered.Cells")
for(i in unique(Clustdata@meta.data$Domaine)){
New.ident <- i
Barcodes <- rownames(subset(Clustdata@meta.data, Clustdata@meta.data$Domaine == i))
print(paste0("Cluster_",i,": ",length(Barcodes), " Cells"))
Barcodes <- Barcodes[Barcodes %in% rownames(RawQCdata@meta.data)]
RawQCdata <- SetIdent(RawQCdata, cells.use = Barcodes ,ident.use = paste0("AP.",i))
}
return(RawQCdata)
}
Allcells.data <- Rename.Clust(Clustdata = AP.data, RawQCdata = Allcells.data)## [1] "Cluster_Sub.Pallium.2: 363 Cells"
## [1] "Cluster_Ventral.Pallium: 312 Cells"
## [1] "Cluster_Sub.Pallium.3: 197 Cells"
## [1] "Cluster_lateral.Pallium.1: 219 Cells"
## [1] "Cluster_Dorsal.Pallium: 201 Cells"
## [1] "Cluster_lateral.Pallium.2: 113 Cells"
## [1] "Cluster_Sub.Pallium.1: 241 Cells"
colors <- c("#969696",tolower(c("#68B041", "#E3C148", "#B7D174", "#83C3B8", "#009FDA", "#3E69AC", "#E46B6B")))
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 2,
no.legend = T,
cols.use = colors)Manuscript Fig. 5B
Manuscript Fig.S6A
Manuscript Fig.S6B
Manuscript Fig.S6C
Manuscript Fig.S6D
Manuscript Fig.S6E
Manuscript Fig.S6F
Manuscript Fig.S6G
#load full dataset
Allcells.data <- readRDS("./Clustered.cells.RDS")
#Transfer the identities
Rename.Clust <- function(Clustdata, RawQCdata) {
for(i in unique(Clustdata@meta.data$Domaine)){
New.ident <- i
Barcodes <- rownames(subset(Clustdata@meta.data, Clustdata@meta.data$Domaine == i))
print(paste0("Cluster_",i,": ",length(Barcodes), " Cells"))
Barcodes <- Barcodes[Barcodes %in% rownames(RawQCdata@meta.data)]
RawQCdata <- SetIdent(RawQCdata, cells.use = Barcodes ,ident.use = paste0("AP.",i))
}
return(RawQCdata)
}
Allcells.data <- Rename.Clust(Clustdata = AP.data, RawQCdata = Allcells.data)## [1] "Cluster_Sub.Pallium.2: 363 Cells"
## [1] "Cluster_Ventral.Pallium: 312 Cells"
## [1] "Cluster_Sub.Pallium.3: 197 Cells"
## [1] "Cluster_lateral.Pallium.1: 219 Cells"
## [1] "Cluster_Dorsal.Pallium: 201 Cells"
## [1] "Cluster_lateral.Pallium.2: 113 Cells"
## [1] "Cluster_Sub.Pallium.1: 241 Cells"
colors2 <- c("#969696",
tolower(c("#68B041", "#E3C148", "#B7D174", "#83C3B8", "#009FDA", "#3E69AC", "#E46B6B")),
"#ec756d", "#c773a7", "#7293c8", "#b79f0b", "#3ca73f","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#cc8778" , "#d14c8d", "#4cabdc", "#5ab793", "#e7823a","#e6bb9b", "#046c9a", "#4784a2" , "#4990c9")
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 2,
no.legend = T,
cols.use = colors2)Manuscript Fig. 2A
## [1] "05 novembre, 2020, 11,24"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] viridis_0.5.1 viridisLite_0.3.0 wesanderson_0.3.6
## [4] RColorBrewer_1.1-2 gridExtra_2.3 reshape_0.8.8
## [7] dplyr_0.8.3 ggExtra_0.9 cluster_2.1.0
## [10] princurve_2.1.4 monocle_2.14.0 DDRTree_0.1.5
## [13] irlba_2.3.3 VGAM_1.1-2 Biobase_2.46.0
## [16] BiocGenerics_0.32.0 Seurat_2.3.4 Matrix_1.2-17
## [19] cowplot_1.0.0 ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.5 Hmisc_4.3-0
## [4] plyr_1.8.4 igraph_1.2.5 lazyeval_0.2.2
## [7] densityClust_0.3 fastICA_1.2-2 digest_0.6.25
## [10] foreach_1.4.7 htmltools_0.5.0 lars_1.2
## [13] gdata_2.18.0 magrittr_1.5 checkmate_1.9.4
## [16] mixtools_1.1.0 ROCR_1.0-7 limma_3.42.0
## [19] matrixStats_0.55.0 R.utils_2.9.0 docopt_0.6.1
## [22] colorspace_1.4-1 ggrepel_0.8.1 xfun_0.18
## [25] sparsesvd_0.2 crayon_1.3.4 jsonlite_1.7.0
## [28] zeallot_0.1.0 survival_2.44-1.1 zoo_1.8-6
## [31] iterators_1.0.12 ape_5.3 glue_1.4.1
## [34] gtable_0.3.0 kernlab_0.9-29 prabclus_2.3-1
## [37] DEoptimR_1.0-8 scales_1.1.0 pheatmap_1.0.12
## [40] bibtex_0.4.2 miniUI_0.1.1.1 Rcpp_1.0.5
## [43] metap_1.1 dtw_1.21-3 xtable_1.8-4
## [46] htmlTable_1.13.2 reticulate_1.13 foreign_0.8-72
## [49] bit_4.0.4 proxy_0.4-23 mclust_5.4.5
## [52] SDMTools_1.1-221.1 Formula_1.2-3 tsne_0.1-3
## [55] htmlwidgets_1.5.1 httr_1.4.1 FNN_1.1.3
## [58] gplots_3.0.1.1 fpc_2.2-3 acepack_1.4.1
## [61] modeltools_0.2-22 ica_1.0-2 farver_2.0.1
## [64] pkgconfig_2.0.3 R.methodsS3_1.7.1 flexmix_2.3-15
## [67] nnet_7.3-14 labeling_0.3 tidyselect_0.2.5
## [70] rlang_0.4.7 reshape2_1.4.3 later_1.0.0
## [73] munsell_0.5.0 tools_3.6.3 ggridges_0.5.1
## [76] fastmap_1.0.1 evaluate_0.14 stringr_1.4.0
## [79] yaml_2.2.1 npsurv_0.4-0 knitr_1.26
## [82] bit64_4.0.2 fitdistrplus_1.0-14 robustbase_0.93-5
## [85] caTools_1.17.1.2 purrr_0.3.3 RANN_2.6.1
## [88] pbapply_1.4-2 nlme_3.1-141 mime_0.7
## [91] slam_0.1-46 R.oo_1.23.0 hdf5r_1.3.2.9000
## [94] compiler_3.6.3 rstudioapi_0.11 png_0.1-7
## [97] lsei_1.2-0 tibble_2.1.3 stringi_1.4.6
## [100] highr_0.8 lattice_0.20-41 HSMMSingleCell_1.6.0
## [103] vctrs_0.2.0 pillar_1.4.2 lifecycle_0.1.0
## [106] combinat_0.0-8 Rdpack_0.11-0 lmtest_0.9-37
## [109] data.table_1.12.6 bitops_1.0-6 gbRd_0.4-11
## [112] httpuv_1.5.2 R6_2.4.1 latticeExtra_0.6-28
## [115] promises_1.1.0 KernSmooth_2.23-15 codetools_0.2-16
## [118] MASS_7.3-53 gtools_3.8.1 assertthat_0.2.1
## [121] withr_2.1.2 qlcMatrix_0.9.7 mgcv_1.8-33
## [124] diptest_0.75-7 doSNOW_1.0.18 grid_3.6.3
## [127] rpart_4.1-15 tidyr_1.0.0 class_7.3-17
## [130] rmarkdown_2.5 segmented_1.0-0 Rtsne_0.15
## [133] shiny_1.4.0 base64enc_0.1-3
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France↩